from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
sys.path.insert(0, '../../')
#import Datanalytics as da
from JKBio import TerraFunction as terra
%load_ext autoreload
%autoreload 2
from JKBio import Helper as h
import pickle
from taigapy import TaigaClient
tc = TaigaClient()
import numpy as np
import itertools
from bokeh.plotting import *
from bokeh.models import HoverTool
output_notebook()
import matplotlib.pyplot as plt
%load_ext rpy2.ipython
import seaborn as sns
import gseapy
from JKBio.helper import pyDESeq2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import AgglomerativeClustering
from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
! gsutil mv gs://transfer-amlproject/*MP7624* gs://transfer-amlproject/RNPv2/
Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz... ==> NOTE: You are performing a sequence of gsutil operations that may run significantly faster if you instead use gsutil -m cp ... Please see the -m section under "gsutil help options" for further information about when gsutil -m can be advantageous. Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz... ==> NOTE: You are performing a sequence of gsutil operations that may run significantly faster if you instead use gsutil -m cp ... Please see the -m section under "gsutil help options" for further information about when gsutil -m can be advantageous. Operation completed over 138 objects/240.6 GiB.
! gsutil -m cp -r gs://transfer-amlproject/RNPv3 gs://amlproject/RNA/
Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]... \ [138/138 files][240.6 GiB/240.6 GiB] 100% Done Operation completed over 138 objects/240.6 GiB.
! gsutil ls gs://amlproject/
gs://amlproject/MV-4-11.bai gs://amlproject/MV-4-11.bam gs://amlproject/Chip/ gs://amlproject/RNA/ gs://amlproject/RNPv2/
sampleset='RNPv3'
terra.uploadFromFolder('amlproject','RNPv2/',
'broad-firecloud-ccle/hg38_RNAseq',samplesetname=sampleset,
fformat="fastqR1R2", sep='_MP7624')
please be sure you gave access to your terra email account access to this bucket
['RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz', 'RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz']
> /home/jeremie/JKBio/TerraFunction.py(227)uploadFromFolder()
226 ipdb.set_trace()
--> 227 df = pd.DataFrame(data)
228 print(df)
ipdb> c
sample_id fastq1 \
0 20200304_10 gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...
1 20200304_11 gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...
2 20200304_12 gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...
3 20200304_13 gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...
4 20200304_14 gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...
.. ... ...
64 20200304_69 gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...
65 20200304_6 gs://amlproject/RNPv2/20200304_6_MP7624_S6_R1_...
66 20200304_7 gs://amlproject/RNPv2/20200304_7_MP7624_S7_R1_...
67 20200304_8 gs://amlproject/RNPv2/20200304_8_MP7624_S8_R1_...
68 20200304_9 gs://amlproject/RNPv2/20200304_9_MP7624_S9_R1_...
fastq2
0 gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...
1 gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...
2 gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...
3 gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...
4 gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...
.. ...
64 gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...
65 gs://amlproject/RNPv2/20200304_6_MP7624_S6_R2_...
66 gs://amlproject/RNPv2/20200304_7_MP7624_S7_R2_...
67 gs://amlproject/RNPv2/20200304_8_MP7624_S8_R2_...
68 gs://amlproject/RNPv2/20200304_9_MP7624_S9_R2_...
[69 rows x 3 columns]
Successfully imported 69 participants.
Successfully imported 69 samples.
Successfully imported 1 sample sets:
* MAX_AML_RNPv2 (69 samples)
wm = dm.WorkspaceManager('broad-firecloud-ccle/hg38_RNAseq')
submission_id = wm.create_submission("star_v1-0_BETA_cfg", sampleset, 'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 2ad41571-b46e-4c3b-be51-44e800717d2a.
submission_id = wm.create_submission("rsem_v1-0_BETA_cfg",
sampleset,'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission cfd65243-2093-4007-9b21-c5b09c9fc875. 1tatus is: Failed for 0 jobs in submission 0. 2 mn elapsed. 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 27 28 29 3 30 31 32 33 34 35 36 37 38 39 4 40 41 42 43 44 45 46 47 48 49 5 50 51 52 53 54 55 56 57 58 59 6 60 61 62 63 64 65 66 67 68 69 7 70 71 72 73 8 9 0.0 of jobs Succeeded in submission 0.
----------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-4-50c8187cd693> in <module> 1 submission_id = wm.create_submission("rsem_v1-0_BETA_cfg", 2 sampleset,'sample_set',expression='this.samples') ----> 3 terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id) ~/JKBio/TerraFunction.py in waitForSubmission(workspace, submissions, raise_errors) 93 print(str(done / (done + failed)) + " of jobs Succeeded in submission " + str(scount) + ".") 94 if len(failed_submission) > 0 and raise_errors: ---> 95 raise RuntimeError(str(len(failed_submission)) + " failed submission") 96 return failed_submission 97 # print and return well formated data RuntimeError: 73 failed submission
submission_id = wm.create_submission("rsem_aggregate_results_v1-0_BETA_cfg",
sampleset)
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 9be600dc-4db0-4af1-b607-503800cc45fc. 1.0 of jobs Succeeded in submission 0.sion 0. 210 mn elapsed.
[]
results = wm.get_sample_sets().loc[sampleset]
rsem_genes_expected_count = results['rsem_genes_expected_count']
results
samples [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1, 20... rsem_transcripts_isopct gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_transcripts_tpm gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_transcripts_expected_count gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_genes_tpm gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_genes_expected_count gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... Name: RNPv3, dtype: object
mkdir ../../data/RNPv3
! gsutil cp $rsem_genes_expected_count ../../data/RNPv3/
Copying gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcbabe2da/9be600dc-4db0-4af1-b607-503800cc45fc/rsem_aggregate_results_workflow/abca308c-59a2-4ad5-8c87-9e4bdf407411/call-rsem_aggregate_results/RNPv3.rsem_genes_expected_count.txt.gz... / [1 files][ 4.6 MiB/ 4.6 MiB] Operation completed over 1 objects/4.6 MiB.
file = '../../data/RNPv3/'+rsem_genes_expected_count.split('/')[-1]
file
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
! gunzip $file
dd
! bwa index -a bwtsw ../data/ERCC92/ERCC92.fa
[bwa_index] Pack FASTA... 0.00 sec [bwa_index] Construct BWT for the packed sequence... [BWTIncCreate] textLength=165512, availableWord=210772 [bwt_gen] Finished constructing BWT in 5 iterations. [bwa_index] 0.02 seconds elapse. [bwa_index] Update BWT... 0.00 sec [bwa_index] Pack forward-only FASTA... 0.00 sec [bwa_index] Construct SA from BWT and Occ... 0.01 sec [main] Version: 0.7.5-r404 [main] CMD: bwa index -a bwtsw ../data/ERCC92/ERCC92.fa [main] Real time: 0.162 sec; CPU: 0.032 sec
! samtools faidx ../data/ERCC92/ERCC92.fa
from JKBio import Helper as h
! ../../TrimGalore-0.6.5/trim_galore
Multicore support not enabled. Proceeding with single-core trimming. Path to Cutadapt set as: 'cutadapt' (default) Cutadapt seems to be working fine (tested command 'cutadapt --version') Cutadapt version: 2.8 single-core operation. No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default) Please provide the filename(s) of one or more FastQ file(s) to launch Trim Galore! USAGE: 'trim_galore [options] <filename(s)>' or 'trim_galore --help' for more options
ls -alh res
total 138M drwxr-xr-x 2 jeremie jeremie 4.0K Mar 13 18:48 ./ drwxr-xr-x 5 jeremie jeremie 4.0K Mar 13 18:48 ../ -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_10_MP7624_S10_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 15M Mar 13 18:48 20200304_10_MP7624_S10_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_11_MP7624_S11_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_11_MP7624_S11_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_12_MP7624_S12_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 15M Mar 13 18:48 20200304_12_MP7624_S12_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_13_MP7624_S13_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_13_MP7624_S13_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_14_MP7624_S14_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_14_MP7624_S14_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 763 Mar 13 18:48 20200304_15_MP7624_S15_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_15_MP7624_S15_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_16_MP7624_S16_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_16_MP7624_S16_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_17_MP7624_S17_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_17_MP7624_S17_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_18_MP7624_S18_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 14M Mar 13 18:48 20200304_18_MP7624_S18_R1_001_trimmed.fq.gz -rw-r--r-- 1 jeremie jeremie 764 Mar 13 18:48 20200304_19_MP7624_S19_R1_001.fastq.gz_trimming_report.txt -rw-r--r-- 1 jeremie jeremie 15M Mar 13 18:48 20200304_19_MP7624_S19_R1_001_trimmed.fq.gz
fastqs = !ls res/*.sam
[autoreload of JKBio.Helper failed: Traceback (most recent call last):
File "/home/jeremie/.local/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
superreload(m, reload, self.old_objects)
File "/home/jeremie/.local/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
module = reload(module)
File "/usr/lib/python3.7/imp.py", line 314, in reload
return importlib.reload(module)
File "/usr/lib/python3.7/importlib/__init__.py", line 169, in reload
_bootstrap._exec(spec, module)
File "<frozen importlib._bootstrap>", line 630, in _exec
File "<frozen importlib._bootstrap_external>", line 724, in exec_module
File "<frozen importlib._bootstrap_external>", line 860, in get_code
File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "../../JKBio/Helper.py", line 676
file[1] + ' | ' pathtosam + ' sort - -o ' + results + file.split('.')[0] + '.sorted.bam'
^
SyntaxError: invalid syntax
]
h.parrun(['samtools sort '+i+' -@ 8 -o '+i.split('.')[0]+'.sorted.bam' for i in fastqs], cores=1, add=['rm '+i for i in fastqs])
fastqs = !ls res/*.bam
fastqs = [v for i ,v in enumerate(fastqs) if i <30]
fastqs
['res/20200304_10_MP7624_S10_R1_001_val_1.sorted.bam', 'res/20200304_11_MP7624_S11_R1_001_val_1.sorted.bam', 'res/20200304_12_MP7624_S12_R1_001_val_1.sorted.bam', 'res/20200304_13_MP7624_S13_R1_001_val_1.sorted.bam', 'res/20200304_14_MP7624_S14_R1_001_val_1.sorted.bam', 'res/20200304_15_MP7624_S15_R1_001_val_1.sorted.bam', 'res/20200304_16_MP7624_S16_R1_001_val_1.sorted.bam', 'res/20200304_17_MP7624_S17_R1_001_val_1.sorted.bam', 'res/20200304_18_MP7624_S18_R1_001_val_1.sorted.bam', 'res/20200304_19_MP7624_S19_R1_001_val_1.sorted.bam', 'res/20200304_1_MP7624_S1_R1_001_val_1.sorted.bam', 'res/20200304_20_MP7624_S20_R1_001_val_1.sorted.bam', 'res/20200304_21_MP7624_S21_R1_001_val_1.sorted.bam', 'res/20200304_22_MP7624_S22_R1_001_val_1.sorted.bam', 'res/20200304_23_MP7624_S23_R1_001_val_1.sorted.bam', 'res/20200304_24_MP7624_S24_R1_001_val_1.sorted.bam', 'res/20200304_25_MP7624_S25_R1_001_val_1.sorted.bam', 'res/20200304_26_MP7624_S26_R1_001_val_1.sorted.bam', 'res/20200304_27_MP7624_S27_R1_001_val_1.sorted.bam', 'res/20200304_28_MP7624_S28_R1_001_val_1.sorted.bam', 'res/20200304_29_MP7624_S29_R1_001_val_1.sorted.bam', 'res/20200304_2_MP7624_S2_R1_001_val_1.sorted.bam', 'res/20200304_30_MP7624_S30_R1_001_val_1.sorted.bam', 'res/20200304_31_MP7624_S31_R1_001_val_1.sorted.bam', 'res/20200304_32_MP7624_S32_R1_001_val_1.sorted.bam', 'res/20200304_33_MP7624_S33_R1_001_val_1.sorted.bam', 'res/20200304_34_MP7624_S34_R1_001_val_1.sorted.bam', 'res/20200304_35_MP7624_S35_R1_001_val_1.sorted.bam', 'res/20200304_36_MP7624_S36_R1_001_val_1.sorted.bam', 'res/20200304_37_MP7624_S37_R1_001_val_1.sorted.bam']
h.getSpikeInControlScales('../data/ERCC92/ERCC92.fa', fastq=list(fastqs), mapper='bwa', pairedEnd=True, cores=10, pathtosam='samtools', pathtotrim_galore='../../TrimGalore-0.6.5/trim_galore', pathtobwa='bwa',totrim=False, tomap=False, tofilter=True, results='res/', toremove=True)
if paired_end, need to be name_*1, name_*2 your files need to be all in the same folder ['20200304_10_MP7624_S10_R1_001_val_1.sorted.bam', '20200304_11_MP7624_S11_R1_001_val_1.sorted.bam', '20200304_12_MP7624_S12_R1_001_val_1.sorted.bam', '20200304_13_MP7624_S13_R1_001_val_1.sorted.bam', '20200304_14_MP7624_S14_R1_001_val_1.sorted.bam', '20200304_15_MP7624_S15_R1_001_val_1.sorted.bam', '20200304_16_MP7624_S16_R1_001_val_1.sorted.bam', '20200304_17_MP7624_S17_R1_001_val_1.sorted.bam', '20200304_18_MP7624_S18_R1_001_val_1.sorted.bam', '20200304_19_MP7624_S19_R1_001_val_1.sorted.bam', '20200304_1_MP7624_S1_R1_001_val_1.sorted.bam', '20200304_20_MP7624_S20_R1_001_val_1.sorted.bam', '20200304_21_MP7624_S21_R1_001_val_1.sorted.bam', '20200304_22_MP7624_S22_R1_001_val_1.sorted.bam', '20200304_23_MP7624_S23_R1_001_val_1.sorted.bam', '20200304_24_MP7624_S24_R1_001_val_1.sorted.bam', '20200304_25_MP7624_S25_R1_001_val_1.sorted.bam', '20200304_26_MP7624_S26_R1_001_val_1.sorted.bam', '20200304_27_MP7624_S27_R1_001_val_1.sorted.bam', '20200304_28_MP7624_S28_R1_001_val_1.sorted.bam', '20200304_29_MP7624_S29_R1_001_val_1.sorted.bam', '20200304_2_MP7624_S2_R1_001_val_1.sorted.bam', '20200304_30_MP7624_S30_R1_001_val_1.sorted.bam', '20200304_31_MP7624_S31_R1_001_val_1.sorted.bam', '20200304_32_MP7624_S32_R1_001_val_1.sorted.bam', '20200304_33_MP7624_S33_R1_001_val_1.sorted.bam', '20200304_34_MP7624_S34_R1_001_val_1.sorted.bam', '20200304_35_MP7624_S35_R1_001_val_1.sorted.bam', '20200304_36_MP7624_S36_R1_001_val_1.sorted.bam', '20200304_37_MP7624_S37_R1_001_val_1.sorted.bam'] you need to have your files in the res/ folder filtering counting
({'20200304_10_MP7624_S10_R1_001_val_1': 0.16898942258544017,
'20200304_11_MP7624_S11_R1_001_val_1': 0.2137747794550614,
'20200304_12_MP7624_S12_R1_001_val_1': 0.1744152888937967,
'20200304_13_MP7624_S13_R1_001_val_1': 0.6105037644754658,
'20200304_14_MP7624_S14_R1_001_val_1': 1.0,
'20200304_15_MP7624_S15_R1_001_val_1': 0.5340835638285261,
'20200304_16_MP7624_S16_R1_001_val_1': 0.35631786259352977,
'20200304_17_MP7624_S17_R1_001_val_1': 0.6156760495423441,
'20200304_18_MP7624_S18_R1_001_val_1': 0.727821906530712,
'20200304_19_MP7624_S19_R1_001_val_1': 0.07483668099259128,
'20200304_1_MP7624_S1_R1_001_val_1': 0.47783743300316456,
'20200304_20_MP7624_S20_R1_001_val_1': 0.3124741021318431,
'20200304_21_MP7624_S21_R1_001_val_1': 0.31825232376633517,
'20200304_22_MP7624_S22_R1_001_val_1': 0.9675827613577999,
'20200304_23_MP7624_S23_R1_001_val_1': 0.5763662983450001,
'20200304_24_MP7624_S24_R1_001_val_1': 0.6390725535862314,
'20200304_25_MP7624_S25_R1_001_val_1': 0.8755348849786047,
'20200304_26_MP7624_S26_R1_001_val_1': 0.7450538798837408,
'20200304_27_MP7624_S27_R1_001_val_1': 0.7762981432726411,
'20200304_28_MP7624_S28_R1_001_val_1': 0.7081911099506929,
'20200304_29_MP7624_S29_R1_001_val_1': 0.8619769958886765,
'20200304_2_MP7624_S2_R1_001_val_1': 0.1737388317027055,
'20200304_30_MP7624_S30_R1_001_val_1': 0.809013539047738,
'20200304_31_MP7624_S31_R1_001_val_1': 0.3852663157088453,
'20200304_32_MP7624_S32_R1_001_val_1': 0.28762189014952155,
'20200304_33_MP7624_S33_R1_001_val_1': 0.32142600106714314,
'20200304_34_MP7624_S34_R1_001_val_1': 0.7614738906061448,
'20200304_35_MP7624_S35_R1_001_val_1': 0.6038299707448979,
'20200304_36_MP7624_S36_R1_001_val_1': 0.46878297111481504,
'20200304_37_MP7624_S37_R1_001_val_1': 0.7230628158623067},
{'20200304_10_MP7624_S10_R1_001_val_1': 3065683,
'20200304_11_MP7624_S11_R1_001_val_1': 2423429,
'20200304_12_MP7624_S12_R1_001_val_1': 2970313,
'20200304_13_MP7624_S13_R1_001_val_1': 848591,
'20200304_14_MP7624_S14_R1_001_val_1': 518068,
'20200304_15_MP7624_S15_R1_001_val_1': 970013,
'20200304_16_MP7624_S16_R1_001_val_1': 1453949,
'20200304_17_MP7624_S17_R1_001_val_1': 841462,
'20200304_18_MP7624_S18_R1_001_val_1': 711806,
'20200304_19_MP7624_S19_R1_001_val_1': 6922648,
'20200304_1_MP7624_S1_R1_001_val_1': 1084193,
'20200304_20_MP7624_S20_R1_001_val_1': 1657955,
'20200304_21_MP7624_S21_R1_001_val_1': 1627853,
'20200304_22_MP7624_S22_R1_001_val_1': 535425,
'20200304_23_MP7624_S23_R1_001_val_1': 898852,
'20200304_24_MP7624_S24_R1_001_val_1': 810656,
'20200304_25_MP7624_S25_R1_001_val_1': 591716,
'20200304_26_MP7624_S26_R1_001_val_1': 695343,
'20200304_27_MP7624_S27_R1_001_val_1': 667357,
'20200304_28_MP7624_S28_R1_001_val_1': 731537,
'20200304_29_MP7624_S29_R1_001_val_1': 601023,
'20200304_2_MP7624_S2_R1_001_val_1': 2981878,
'20200304_30_MP7624_S30_R1_001_val_1': 640370,
'20200304_31_MP7624_S31_R1_001_val_1': 1344701,
'20200304_32_MP7624_S32_R1_001_val_1': 1801212,
'20200304_33_MP7624_S33_R1_001_val_1': 1611780,
'20200304_34_MP7624_S34_R1_001_val_1': 680349,
'20200304_35_MP7624_S35_R1_001_val_1': 857970,
'20200304_36_MP7624_S36_R1_001_val_1': 1105134,
'20200304_37_MP7624_S37_R1_001_val_1': 716491})
for f in h.grouped(fastqs, 2):
print(f[0])
res/20200304_38_MP7624_S38_R1_001_val_1.fq.gz res/20200304_39_MP7624_S39_R1_001_val_1.fq.gz res/20200304_3_MP7624_S3_R1_001_val_1.fq.gz res/20200304_40_MP7624_S40_R1_001_val_1.fq.gz res/20200304_41_MP7624_S41_R1_001_val_1.fq.gz res/20200304_42_MP7624_S42_R1_001_val_1.fq.gz res/20200304_43_MP7624_S43_R1_001_val_1.fq.gz res/20200304_44_MP7624_S44_R1_001_val_1.fq.gz res/20200304_45_MP7624_S45_R1_001_val_1.fq.gz res/20200304_46_MP7624_S46_R1_001_val_1.fq.gz res/20200304_47_MP7624_S47_R1_001_val_1.fq.gz res/20200304_48_MP7624_S48_R1_001_val_1.fq.gz res/20200304_49_MP7624_S49_R1_001_val_1.fq.gz res/20200304_4_MP7624_S4_R1_001_val_1.fq.gz res/20200304_50_MP7624_S50_R1_001_val_1.fq.gz res/20200304_51_MP7624_S51_R1_001_val_1.fq.gz res/20200304_52_MP7624_S52_R1_001_val_1.fq.gz res/20200304_53_MP7624_S53_R1_001_val_1.fq.gz res/20200304_54_MP7624_S54_R1_001_val_1.fq.gz res/20200304_55_MP7624_S55_R1_001_val_1.fq.gz res/20200304_56_MP7624_S56_R1_001_val_1.fq.gz res/20200304_57_MP7624_S57_R1_001_val_1.fq.gz res/20200304_58_MP7624_S58_R1_001_val_1.fq.gz res/20200304_59_MP7624_S59_R1_001_val_1.fq.gz res/20200304_5_MP7624_S5_R1_001_val_1.fq.gz res/20200304_60_MP7624_S60_R1_001_val_1.fq.gz res/20200304_61_MP7624_S61_R1_001_val_1.fq.gz res/20200304_62_MP7624_S62_R1_001_val_1.fq.gz res/20200304_63_MP7624_S63_R1_001_val_1.fq.gz res/20200304_64_MP7624_S64_R1_001_val_1.fq.gz res/20200304_65_MP7624_S65_R1_001_val_1.fq.gz res/20200304_66_MP7624_S66_R1_001_val_1.fq.gz res/20200304_67_MP7624_S67_R1_001_val_1.fq.gz res/20200304_68_MP7624_S68_R1_001_val_1.fq.gz res/20200304_69_MP7624_S69_R1_001_val_1.fq.gz res/20200304_6_MP7624_S6_R1_001_val_1.fq.gz res/20200304_7_MP7624_S7_R1_001_val_1.fq.gz res/20200304_8_MP7624_S8_R1_001_val_1.fq.gz res/20200304_9_MP7624_S9_R1_001_val_1.fq.gz
file
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
rsem_genes_expected_count = pd.read_csv(file[:-3], sep='\t')
rsem_genes_expected_count = pd.read_csv("../../data/RNPv2/MAX_AML_RNPv2.rsem_genes_expected_count.txt", sep='\t')
data = rsem_genes_expected_count.drop("transcript_id(s)",1)
data["gene_id"] = h.convertGenes(data['gene_id'])[0]
you need access to taiga for this (https://pypi.org/project/taigapy/) 20702 could not be parsed... we don't have all genes already
data=data.set_index('gene_id')
data
| 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | ... | 67 | 68 | 69 | 7 | 70 | 71 | 72 | 73 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| TNMD | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | 3.00 | 5.00 | 8.00 | 2.00 | 2.00 | 1.00 | 2.00 | 1.00 | 3.00 | 3.00 | ... | 1.00 | 1.00 | 5.00 | 1.00 | 6.0 | 3.00 | 3.00 | 4.00 | 2.00 | 4.00 |
| ERCC-00165 | 215.00 | 594.00 | 424.00 | 509.00 | 136.00 | 88.00 | 165.00 | 258.00 | 161.00 | 163.00 | ... | 93.00 | 139.00 | 87.00 | 127.00 | 628.0 | 207.00 | 151.00 | 241.00 | 187.00 | 176.00 |
| ERCC-00168 | 3.00 | 12.00 | 9.00 | 8.00 | 0.00 | 8.00 | 0.00 | 5.00 | 5.00 | 1.00 | ... | 3.00 | 4.00 | 1.00 | 3.00 | 8.0 | 5.00 | 4.00 | 7.00 | 8.00 | 3.00 |
| ERCC-00170 | 66.00 | 205.00 | 133.00 | 211.00 | 57.00 | 40.00 | 73.00 | 94.00 | 42.00 | 40.00 | ... | 41.00 | 56.00 | 33.00 | 50.00 | 141.0 | 72.00 | 92.00 | 110.00 | 89.00 | 88.00 |
| ERCC-00171 | 13554.00 | 40900.00 | 29090.00 | 33242.00 | 10039.00 | 6399.00 | 10836.00 | 15684.00 | 9526.00 | 8893.00 | ... | 7058.00 | 7576.00 | 5882.00 | 8381.00 | 47913.0 | 12046.00 | 10447.00 | 17316.00 | 10492.00 | 12389.00 |
58813 rows × 73 columns
rename = {"1": "mr120-MV411-RNP_IRF2BP2-r4",
"2": "mr121-MV411-RNP_IRF2BP2-r5",
"3": "mr122-MV411-RNP_IRF2BP2-r6",
"4": "mr123-MV411-RNP_IRF8-r4",
"5": "mr124-MV411-RNP_IRF8-r5",
"6": "mr125-MV411-RNP_IRF8-r6",
"7": "mr126-MV411-RNP_MEF2D-r4",
"8": "mr127-MV411-RNP_MEF2D-r5",
"9": "mr128-MV411-RNP_MEF2D-r6",
"10": "mr129-MV411-RNP_MYC-r4",
"11": "mr130-MV411-RNP_MYC-r5",
"12": "mr131-MV411-RNP_MYC-r6",
"13": "mr132-MV411-RNP_RUNX1-r4",
"14": "mr133-MV411-RNP_RUNX1-r5",
"15": "mr134-MV411-RNP_RUNX1-r6",
"16": "mr135-MV411-RNP_RUNX2-r4",
"17": "mr136-MV411-RNP_RUNX2-r5",
"18": "mr137-MV411-RNP_RUNX2-r6",
"19": "mr138-MV411-RNP_SPI1-r4",
"20": "mr139-MV411-RNP_SPI1-r5",
"21": "mr140-MV411-RNP_SPI1-r6",
"22": "mr141-MV411-RNP_ZMYND8-r4",
"23": "mr142-MV411-RNP_ZMYND8-r5",
"24": "mr143-MV411-RNP_ZMYND8-r6",
"25": "mr144-MV411-RNP_LMO2-r4",
"26": "mr145-MV411-RNP_LMO2-r5",
"27": "mr146-MV411-RNP_LMO2-r6",
"28": "mr147-MV411-RNP_LYL1-r4",
"29": "mr148-MV411-RNP_LYL1-r5",
"30": "mr149-MV411-RNP_LYL1-r6",
"31": "mr150-MV411-RNP_MAX-r4",
"32": "mr151-MV411-RNP_MAX-r5",
"33": "mr152-MV411-RNP_MAX-r6",
"34": "mr153-MV411-RNP_ZEB2-r4",
"35": "mr154-MV411-RNP_ZEB2-r5",
"36": "mr155-MV411-RNP_ZEB2-r6",
"37": "mr156-MV411-RNP_MEF2C-r4",
"38": "mr157-MV411-RNP_MEF2C-r5",
"39": "mr158-MV411-RNP_MEF2C-r6",
"40": "mr159-MV411-RNP_MEIS1-r4",
"41": "mr160-MV411-RNP_MEIS1-r5",
"42": "mr161-MV411-RNP_MEIS1-r6",
"43": "mr162-MV411-RNP_FLI1-r4",
"44": "mr163-MV411-RNP_FLI1-r5",
"45": "mr164-MV411-RNP_FLI1-r6",
"46": "mr165-MV411-RNP_ELF2-r4",
"47": "mr166-MV411-RNP_ELF2-r5",
"48": "mr167-MV411-RNP_ELF2-r6",
"49": "mr168-MV411-RNP_GFI1-r4",
"50": "mr169-MV411-RNP_GFI1-r5",
"51": "mr170-MV411-RNP_GFI1-r6",
"52": "mr171-MV411-RNP_IKZF1-r4",
"53": "mr172-MV411-RNP_IKZF1-r5",
"54": "mr173-MV411-RNP_IKZF1-r6",
"55": "mr174-MV411-RNP_CEBPA-r4",
"56": "mr175-MV411-RNP_CEBPA-r5",
"57": "mr176-MV411-RNP_CEBPA-r6",
"58": "mr177-MV411-RNP_MYB-r4",
"59": "mr178-MV411-RNP_MYB-r5",
"60": "mr179-MV411-RNP_MYB-r6",
"61": "mr180-MV411-RNP_MYBL2-r1",
"62": "mr181-MV411-RNP_MYBL2-r2",
"63": "mr182-MV411-RNP_MYBL2-r3",
"64": "mr183-MV411-RNP_HOXA9-r4",
"65": "mr184-MV411-RNP_HOXA9-r5",
"66": "mr185-MV411-RNP_HOXA9-r6",
"67": "mr186-MV411-RNP_AAVS1-r1",
"68": "mr187-MV411-RNP_AAVS1-r2",
"69": "mr188-MV411-RNP_AAVS1-r3",
"70": "mr189-MV411-RNP_SP1-r4",
"71": "mr190-MV411-RNP_SP1-r5",
"72": "mr191-MV411-RNP_SP1-r6",
"73": "mr192-MV411-RNP_SP1-r7"}
data.columns
Index(['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2',
'20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30',
'31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41',
'42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52',
'53', '54', '55', '56', '57', '58', '59', '6', '60', '61', '62', '63',
'64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '8',
'9'],
dtype='object')
data.columns = [rename[i] for i in data.columns]
data
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr189-MV411-RNP_SP1-r4 | mr190-MV411-RNP_SP1-r5 | mr191-MV411-RNP_SP1-r6 | mr192-MV411-RNP_SP1-r7 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| TNMD | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | 3.00 | 5.00 | 8.00 | 2.00 | 2.00 | 1.00 | 2.00 | 1.00 | 3.00 | 3.00 | ... | 1.00 | 1.00 | 5.00 | 1.00 | 6.0 | 3.00 | 3.00 | 4.00 | 2.00 | 4.00 |
| ERCC-00165 | 215.00 | 594.00 | 424.00 | 509.00 | 136.00 | 88.00 | 165.00 | 258.00 | 161.00 | 163.00 | ... | 93.00 | 139.00 | 87.00 | 127.00 | 628.0 | 207.00 | 151.00 | 241.00 | 187.00 | 176.00 |
| ERCC-00168 | 3.00 | 12.00 | 9.00 | 8.00 | 0.00 | 8.00 | 0.00 | 5.00 | 5.00 | 1.00 | ... | 3.00 | 4.00 | 1.00 | 3.00 | 8.0 | 5.00 | 4.00 | 7.00 | 8.00 | 3.00 |
| ERCC-00170 | 66.00 | 205.00 | 133.00 | 211.00 | 57.00 | 40.00 | 73.00 | 94.00 | 42.00 | 40.00 | ... | 41.00 | 56.00 | 33.00 | 50.00 | 141.0 | 72.00 | 92.00 | 110.00 | 89.00 | 88.00 |
| ERCC-00171 | 13554.00 | 40900.00 | 29090.00 | 33242.00 | 10039.00 | 6399.00 | 10836.00 | 15684.00 | 9526.00 | 8893.00 | ... | 7058.00 | 7576.00 | 5882.00 | 8381.00 | 47913.0 | 12046.00 | 10447.00 | 17316.00 | 10492.00 | 12389.00 |
58813 rows × 73 columns
filter some more
toremove = np.argwhere(data.values.var(1)==0)
toremove.ravel()
array([ 1, 15, 24, ..., 58714, 58715, 58718])
toremove.shape
(19991, 1)
data = data.drop(data.iloc[toremove.ravel()].index,0)
data.shape
(38787, 73)
ERCC = data[~data.index.str.contains('ENSG00')]
data = data[~data.index.str.contains('ERCC-')]
ensg = data[data.index.str.contains('ENSG00')]
data = data[~data.index.str.contains('ENSG00')]
renormalize the data
len(ERCC)
26672
ctf=pd.read_csv('../data/CTF.csv',header=None)[0].values.tolist()
ctf
['MYC', 'MYB', 'SPI1', 'RUNX1', 'GSE1', 'IRF2BP2', 'FLI1', 'ELF2', 'ZEB2', 'IKAROS', 'GFI1', 'LMO2', 'CEBPA', 'MEF2D', 'MEF2C', 'IRF8', 'MEIS1', 'RUNX2', 'ETV6', 'LDB1', 'RUNX2', 'SP1', 'ZMYND8']
genenames = data.index
ctfpos = [i for i, val in enumerate(genenames) if val in ctf]
notctfpos = [i for i, val in enumerate(genenames) if val not in ctf]
We find a CTF not in the dataset
[val for val in ctf if val not in genenames]
['IKAROS']
ctf.remove('IKAROS')
%%R
library('erccdashboard')
R[write to console]: Loading required package: ggplot2
R[write to console]: Loading required package: gridExtra
R[write to console]:
Attaching package: ‘gridExtra’
R[write to console]: The following object is masked from ‘package:Biobase’:
combine
R[write to console]: The following object is masked from ‘package:BiocGenerics’:
combine
ERCC = ERCC.astype(int)
ERCC['Feature'] = ERCC.index
ERCC
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr189-MV411-RNP_SP1-r4 | mr190-MV411-RNP_SP1-r5 | mr191-MV411-RNP_SP1-r6 | mr192-MV411-RNP_SP1-r7 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | Feature | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | TSPAN6 |
| DPM1 | 1619 | 2465 | 1701 | 1535 | 1863 | 2093 | 2027 | 2202 | 2148 | 2235 | ... | 1840 | 1729 | 1983 | 1926 | 1846 | 1915 | 2633 | 2451 | 2378 | DPM1 |
| SCYL3 | 464 | 846 | 672 | 603 | 577 | 617 | 601 | 545 | 575 | 536 | ... | 460 | 437 | 542 | 572 | 507 | 580 | 713 | 670 | 576 | SCYL3 |
| C1orf112 | 780 | 1031 | 755 | 676 | 1232 | 1209 | 1309 | 1370 | 1245 | 1257 | ... | 1277 | 1032 | 1163 | 783 | 1088 | 1184 | 1572 | 1481 | 1332 | C1orf112 |
| FGR | 1443 | 8556 | 6387 | 5955 | 2359 | 2615 | 2258 | 3340 | 3229 | 3466 | ... | 2401 | 2230 | 3680 | 2016 | 2285 | 2384 | 3106 | 4706 | 4308 | FGR |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | 3 | 5 | 8 | 2 | 2 | 1 | 2 | 1 | 3 | 3 | ... | 1 | 5 | 1 | 6 | 3 | 3 | 4 | 2 | 4 | ERCC-00164 |
| ERCC-00165 | 215 | 594 | 424 | 509 | 136 | 88 | 165 | 258 | 161 | 163 | ... | 139 | 87 | 127 | 628 | 207 | 151 | 241 | 187 | 176 | ERCC-00165 |
| ERCC-00168 | 3 | 12 | 9 | 8 | 0 | 8 | 0 | 5 | 5 | 1 | ... | 4 | 1 | 3 | 8 | 5 | 4 | 7 | 8 | 3 | ERCC-00168 |
| ERCC-00170 | 66 | 205 | 133 | 211 | 57 | 40 | 73 | 94 | 42 | 40 | ... | 56 | 33 | 50 | 141 | 72 | 92 | 110 | 89 | 88 | ERCC-00170 |
| ERCC-00171 | 13554 | 40900 | 29090 | 33242 | 10039 | 6399 | 10836 | 15684 | 9526 | 8893 | ... | 7576 | 5882 | 8381 | 47913 | 12046 | 10447 | 17316 | 10492 | 12389 | ERCC-00171 |
26672 rows × 74 columns
experiments = list(set([i.split('-')[2] for i in ERCC.columns[:-1]]))
experiments.remove("RNP_AAVS1")
from rpy2.robjects.packages import importr
erccdashboard = importr('erccdashboard')
#TODO: compute the mass from concentration
###################################################
### code chunk number 3: defineInputData
###################################################
%R datType = "count" # "count" for RNA-Seq data, "array" for microarray data
%R isNorm = False # flag to indicate if input expression measures are already
# normalized, default is FALSE
%R filenameRoot = "RNPv2" # user defined filename prefix for results files
%R sample2Name = "AAAVS1" # name for sample 2 in the experiment
%R erccmix = "RatioPair" # name of ERCC mixture design, "RatioPair" is default
%R erccdilution = 1/100 # dilution factor used for Ambion spike-in mixtures
%R spikeVol = 1 # volume (in microliters) of diluted spike-in mixture added to
# total RNA mass
%R totalRNAmass = 0.500 # mass (in micrograms) of total RNA
%R choseFDR = 0.05 # user defined false discovery rate (FDR), default is 0.05
cols = list(ERCC.columns)
cols.sort()
for val in experiments:
data = {}
e=0
data.update({
'Feature':'Feature'
})
for i in cols:
if val in i:
e+=1
data.update({i: val.split('_')[-1]+'_'+str(e)})
data.update({
'mr186-MV411-RNP_AAVS1-r1': 'AAAVS1_1',
'mr187-MV411-RNP_AAVS1-r2': 'AAAVS1_2',
'mr188-MV411-RNP_AAVS1-r3': 'AAAVS1_3'
})
a = ERCC[list(data.keys())].rename(columns=data)
a.to_csv('../data/ERCC_estimation.csv', index=None)
val = val.split('_')[-1]
torm = 'RNPv2.'+val+'.AAAVS1.All.Pvals.csv'
! rm $torm
%R -i val print(val)
%R print(sample2Name)
%R a <- read.csv('../data/ERCC_estimation.csv')
%R print(head(a))
%R exDat = ''
try:
%R -i val exDat = runDashboard(datType=datType, isNorm = isNorm, exTable=a, filenameRoot=filenameRoot, sample1Name=val, sample2Name=sample2Name, erccmix=erccmix, erccdilution=erccdilution, spikeVol=spikeVol, totalRNAmass=totalRNAmass, choseFDR=choseFDR)
except Warning:
print("failed for "+val)
continue
except:
print('worked for '+val)
%R print(summary(exDat))
%R grid.arrange(exDat$Figures$dynRangePlot)
%R grid.arrange(exDat$Figures$rocPlot)
%R grid.arrange(exDat$Figures$lodrERCCPlot)
%R grid.arrange(exDat$Figures$maPlot)
rm: cannot remove 'RNPv2.SPI1.AAAVS1.All.Pvals.csv': No such file or directory [1] "SPI1" [1] "AAAVS1" Feature SPI1_1 SPI1_2 SPI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 1 0 0 0 2 DPM1 2415 1729 2302 1620 1840 1729 3 SCYL3 798 648 744 430 460 437 4 C1orf112 1054 742 1104 949 1277 1032 5 FGR 2369 1766 2458 2323 2401 2230 6 CFH 44 22 58 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.SPI1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17191 transcripts remain for analysis. A total of 11 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00142 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2758.5 2102.5 2723 1622 1888 1696 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 81 Outlier ERCCs for GLM r_m Estimate: ERCC-00123 ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00033 ERCC-00028 ERCC-00085 ERCC-00014 ERCC-00170 ERCC-00144 ERCC-00019 ERCC-00062 ERCC-00095 ERCC-00131 ERCC-00092 ERCC-00116 ERCC-00108 ERCC-00136 ERCC-00004 ERCC-00130 GLM log(r_m) estimate: -1.172822 GLM log(r_m) estimate weighted s.e.: 0.9182462 Number of ERCCs in Mix 1 dyn range: 81 Number of ERCCs in Mix 2 dyn range: 81 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00012 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00017 ERCC-00041 ERCC-00073 ERCC-00081 ERCC-00156 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.922442 7.650882 7.909489 7.391415 7.543273 7.436028 Disp = 0.00583 , BCV = 0.0763 Disp = 0.00582 , BCV = 0.0763 Finished DE testing Finished examining dispersions Threshold P-value 0.09198248 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.598 22 23 1:1.5 0.626 19 23 1:2 0.665 20 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for SPI1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
rm: cannot remove 'RNPv2.RUNX2.AAAVS1.All.Pvals.csv': No such file or directory [1] "RUNX2" [1] "AAAVS1" Feature RUNX2_1 RUNX2_2 RUNX2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 2202 2148 2235 1620 1840 1729 3 SCYL3 545 575 536 430 460 437 4 C1orf112 1370 1245 1257 949 1277 1032 5 FGR 3340 3229 3466 2323 2401 2230 6 CFH 16 12 14 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.RUNX2.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17042 transcripts remain for analysis. A total of 20 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2369.75 2268 2240.75 1638.75 1908.5 1710.75 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 72 Outlier ERCCs for GLM r_m Estimate: ERCC-00147 ERCC-00077 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00059 ERCC-00163 ERCC-00019 ERCC-00062 ERCC-00095 ERCC-00078 ERCC-00071 ERCC-00079 ERCC-00131 ERCC-00165 ERCC-00092 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00111 ERCC-00116 ERCC-00108 ERCC-00043 ERCC-00145 ERCC-00136 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: -0.06126088 GLM log(r_m) estimate weighted s.e.: 0.8674952 Number of ERCCs in Mix 1 dyn range: 72 Number of ERCCs in Mix 2 dyn range: 72 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00073 ERCC-00097 ERCC-00134 ERCC-00104 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.77054 7.726654 7.714566 7.401689 7.554073 7.444687 Disp = 0.00341 , BCV = 0.0584 Disp = 0.00341 , BCV = 0.0584 Finished DE testing Finished examining dispersions Threshold P-value 0.00174422 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.591 19 23 1:1.5 0.611 18 23 1:2 0.680 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for RUNX2
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
rm: cannot remove 'RNPv2.GFI1.AAAVS1.All.Pvals.csv': No such file or directory [1] "GFI1" [1] "AAAVS1" Feature GFI1_1 GFI1_2 GFI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 3000 984 1798 1620 1840 1729 3 SCYL3 708 258 466 430 460 437 4 C1orf112 1813 586 1037 949 1277 1032 5 FGR 2396 788 1525 2323 2401 2230 6 CFH 42 18 35 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.GFI1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16711 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 3122 1018 1947 1690.5 1977 1757 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00069 ERCC-00085 ERCC-00143 ERCC-00054 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00157 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00035 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00009 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.2828325 GLM log(r_m) estimate weighted s.e.: 0.852983 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00040 ERCC-00097 ERCC-00120 ERCC-00137 ERCC-00158 ERCC-00164 ERCC-00168 ERCC-00073 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 8.046229 6.925595 7.574045 7.43278 7.589336 7.471363 Disp = 0.0035 , BCV = 0.0591 Disp = 0.0035 , BCV = 0.0591 Finished DE testing Finished examining dispersions Threshold P-value 0.01869133 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.507 18 23 1:1.5 0.663 18 23 1:2 0.516 18 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for GFI1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "IRF2BP2" [1] "AAAVS1" Feature IRF2BP2_1 IRF2BP2_2 IRF2BP2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1619 1938 2043 1620 1840 1729 3 SCYL3 464 545 564 430 460 437 4 C1orf112 780 776 908 949 1277 1032 5 FGR 1443 1587 1765 2323 2401 2230 6 CFH 3 5 15 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.IRF2BP2.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16582 transcripts remain for analysis. A total of 13 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00117 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 1614.75 1750.75 2094 1704 1995 1776 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 79 Outlier ERCCs for GLM r_m Estimate: ERCC-00123 ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00039 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00157 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00170 ERCC-00144 ERCC-00019 ERCC-00078 ERCC-00079 ERCC-00062 ERCC-00095 ERCC-00165 ERCC-00131 ERCC-00112 ERCC-00092 ERCC-00022 ERCC-00043 ERCC-00116 ERCC-00108 ERCC-00003 ERCC-00136 ERCC-00046 ERCC-00004 ERCC-00002 ERCC-00130 GLM log(r_m) estimate: -1.12242 GLM log(r_m) estimate weighted s.e.: 0.8872992 Number of ERCCs in Mix 1 dyn range: 79 Number of ERCCs in Mix 2 dyn range: 79 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00041 ERCC-00138 ERCC-00017 ERCC-00073 ERCC-00081 ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.386935 7.4678 7.646831 7.440734 7.598399 7.482119 Disp = 0.00418 , BCV = 0.0647 Disp = 0.00419 , BCV = 0.0647 Finished DE testing Finished examining dispersions Threshold P-value 0.05686497 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.524 21 23 1:1.5 0.567 18 23 1:2 0.530 20 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for IRF2BP2
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "MYC" [1] "AAAVS1" Feature MYC_1 MYC_2 MYC_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 2465 1701 1535 1620 1840 1729 3 SCYL3 846 672 603 430 460 437 4 C1orf112 1031 755 676 949 1277 1032 5 FGR 8556 6387 5955 2323 2401 2230 6 CFH 5 1 2 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MYC.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17015 transcripts remain for analysis. A total of 11 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00098 ERCC-00117 ERCC-00142 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2374 1836.5 1790.5 1643 1913.5 1714 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 81 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00013 ERCC-00077 ERCC-00058 ERCC-00069 ERCC-00033 ERCC-00039 ERCC-00143 ERCC-00154 ERCC-00028 ERCC-00054 ERCC-00085 ERCC-00160 ERCC-00148 ERCC-00157 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00170 ERCC-00144 ERCC-00019 ERCC-00099 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00079 ERCC-00062 ERCC-00095 ERCC-00165 ERCC-00131 ERCC-00035 ERCC-00044 ERCC-00112 ERCC-00076 ERCC-00092 ERCC-00022 ERCC-00042 ERCC-00111 ERCC-00043 ERCC-00116 ERCC-00108 ERCC-00145 ERCC-00003 ERCC-00136 ERCC-00171 ERCC-00046 ERCC-00004 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00002 ERCC-00130 GLM log(r_m) estimate: -1.149172 GLM log(r_m) estimate weighted s.e.: 0.8782513 Number of ERCCs in Mix 1 dyn range: 81 Number of ERCCs in Mix 2 dyn range: 81 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00041 ERCC-00017 ERCC-00073 ERCC-00081 ERCC-00086 ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00156 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.772332 7.515617 7.49025 7.404279 7.556689 7.446585 Disp = 0.00415 , BCV = 0.0645 Disp = 0.00416 , BCV = 0.0645 Finished DE testing Finished examining dispersions Threshold P-value 0.1420395 Threshold P-value is high for the chosen FDR of 0.05 The sample comparison indicates a large amount of differential expression in the measured transcript populations Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.543 22 23 1:1.5 0.594 18 23 1:2 0.562 21 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for MYC
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "LMO2" [1] "AAAVS1" Feature LMO2_1 LMO2_2 LMO2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1907 2199 2141 1620 1840 1729 3 SCYL3 561 592 644 430 460 437 4 C1orf112 1229 1188 1285 949 1277 1032 5 FGR 2777 3265 2969 2323 2401 2230 6 CFH 13 8 10 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.LMO2.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16882 transcripts remain for analysis. A total of 20 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2221.75 2325 2312.5 1662 1942.5 1733 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 72 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00042 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.4273754 GLM log(r_m) estimate weighted s.e.: 0.8636267 Number of ERCCs in Mix 1 dyn range: 72 Number of ERCCs in Mix 2 dyn range: 72 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00120 ERCC-00137 ERCC-00158 ERCC-00164 ERCC-00168 ERCC-00073 ERCC-00109 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.70605 7.751475 7.746084 7.415777 7.571731 7.457609 Disp = 0.00329 , BCV = 0.0574 Disp = 0.00329 , BCV = 0.0574 Finished DE testing Finished examining dispersions Threshold P-value 0.003377844 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.494 18 23 1:1.5 0.472 18 23 1:2 0.491 18 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for LMO2
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
rm: cannot remove 'RNPv2.IKZF1.AAAVS1.All.Pvals.csv': No such file or directory [1] "IKZF1" [1] "AAAVS1" Feature IKZF1_1 IKZF1_2 IKZF1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1299 1529 2015 1620 1840 1729 3 SCYL3 361 406 571 430 460 437 4 C1orf112 836 967 1213 949 1277 1032 5 FGR 2082 1867 3154 2323 2401 2230 6 CFH 4 6 5 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.IKZF1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16752 transcripts remain for analysis. A total of 22 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 1576.25 1526 2348.25 1677 1966.25 1753 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 70 Outlier ERCCs for GLM r_m Estimate: ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00085 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00144 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.4183636 GLM log(r_m) estimate weighted s.e.: 0.8313124 Number of ERCCs in Mix 1 dyn range: 70 Number of ERCCs in Mix 2 dyn range: 70 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00031 ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00134 ERCC-00158 ERCC-00164 ERCC-00168 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.362804 7.330405 7.761426 7.424762 7.583883 7.469084 Disp = 0.00644 , BCV = 0.0803 Disp = 0.00645 , BCV = 0.0803 Finished DE testing Finished examining dispersions Threshold P-value 0.00549604 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.471 19 23 1:1.5 0.454 18 23 1:2 0.460 16 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for IKZF1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "MYBL2" [1] "AAAVS1" Feature MYBL2_1 MYBL2_2 MYBL2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1881 3921 1347 1620 1840 1729 3 SCYL3 469 1039 389 430 460 437 4 C1orf112 1108 2192 863 949 1277 1032 5 FGR 2573 5804 2117 2323 2401 2230 6 CFH 18 18 8 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MYBL2.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17053 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 1865 3829 1543 1638 1906 1710 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00039 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00131 ERCC-00078 ERCC-00071 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00096 ERCC-00002 GLM log(r_m) estimate: 0.620191 GLM log(r_m) estimate weighted s.e.: 0.8558226 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00013 ERCC-00031 ERCC-00073 ERCC-00077 ERCC-00097 ERCC-00120 ERCC-00134 ERCC-00147 ERCC-00158 ERCC-00168 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.531016 8.250359 7.341484 7.401231 7.552762 7.444249 Disp = 0.00431 , BCV = 0.0656 Disp = 0.00431 , BCV = 0.0656 Finished DE testing Finished examining dispersions Threshold P-value 0.0009369989 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.536 19 23 1:1.5 0.412 18 23 1:2 0.495 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for MYBL2
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "MEIS1" [1] "AAAVS1" Feature MEIS1_1 MEIS1_2 MEIS1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1916 2046 2726 1620 1840 1729 3 SCYL3 477 554 683 430 460 437 4 C1orf112 1121 1128 1408 949 1277 1032 5 FGR 1935 2193 2556 2323 2401 2230 6 CFH 7 3 12 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MEIS1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16907 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2115 2194 2639.5 1658 1938 1730 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00013 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00069 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00157 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00131 ERCC-00071 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.4541636 GLM log(r_m) estimate weighted s.e.: 0.8992704 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00097 ERCC-00164 ERCC-00168 ERCC-00073 ERCC-00109 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.65681 7.693482 7.878345 7.413367 7.569412 7.455877 Disp = 0.00363 , BCV = 0.0603 Disp = 0.00364 , BCV = 0.0603 Finished DE testing Finished examining dispersions Threshold P-value 0.00638019 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.429 18 23 1:1.5 0.586 18 23 1:2 0.605 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for MEIS1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "IRF8" [1] "AAAVS1" Feature IRF8_1 IRF8_2 IRF8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 2211 2243 2269 1620 1840 1729 3 SCYL3 611 621 622 430 460 437 4 C1orf112 1390 1268 1244 949 1277 1032 5 FGR 3652 3917 4442 2323 2401 2230 6 CFH 16 17 15 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.IRF8.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16800 transcripts remain for analysis. A total of 18 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00138 ERCC-00142 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2385 2327 2453 1672.25 1957.5 1744.25 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 74 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00039 ERCC-00085 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00157 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.2633745 GLM log(r_m) estimate weighted s.e.: 0.8414289 Number of ERCCs in Mix 1 dyn range: 74 Number of ERCCs in Mix 2 dyn range: 74 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00012 ERCC-00013 ERCC-00134 ERCC-00137 ERCC-00164 ERCC-00168 ERCC-00073 ERCC-00156 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.776954 7.752335 7.805067 7.421925 7.579423 7.46408 Disp = 0.00426 , BCV = 0.0653 Disp = 0.00427 , BCV = 0.0654 Finished DE testing Finished examining dispersions Threshold P-value 0.02567527 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.453 20 23 1:1.5 0.533 19 23 1:2 0.608 18 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for IRF8
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "ELF2" [1] "AAAVS1" Feature ELF2_1 ELF2_2 ELF2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 2516 1913 1971 1620 1840 1729 3 SCYL3 640 486 584 430 460 437 4 C1orf112 1315 1056 1278 949 1277 1032 5 FGR 3206 2242 2711 2323 2401 2230 6 CFH 4 8 5 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.ELF2.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16904 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2414.75 1863 2194 1658.5 1938.5 1731 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00069 ERCC-00085 ERCC-00039 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.4388019 GLM log(r_m) estimate weighted s.e.: 0.8523992 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00040 ERCC-00073 ERCC-00120 ERCC-00123 ERCC-00164 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.789351 7.529943 7.693482 7.413669 7.56967 7.456455 Disp = 0.00472 , BCV = 0.0687 Disp = 0.00472 , BCV = 0.0687 Finished DE testing Finished examining dispersions Threshold P-value 0.0003092106 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.502 19 23 1:1.5 0.529 18 23 1:2 0.453 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for ELF2
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
rm: cannot remove 'RNPv2.SP1.AAAVS1.All.Pvals.csv': No such file or directory [1] "SP1" [1] "AAAVS1" Feature SP1_1 SP1_2 SP1_3 SP1_4 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 0 2 DPM1 1926 1846 1915 2633 1620 1840 1729 3 SCYL3 572 507 580 713 430 460 437 4 C1orf112 783 1088 1184 1572 949 1277 1032 5 FGR 2016 2285 2384 3106 2323 2401 2230 6 CFH 15 13 15 15 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.SP1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16944 transcripts remain for analysis. A total of 15 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00117 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2097 1924 2233.5 2745 1653.25 1933 1726 Check for sample mRNA fraction differences(r_m)...
R[write to console]: Error in dimnames(x) <- dn : length of 'dimnames' [2] not equal to array extent Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-
Error in dimnames(x) <- dn :
length of 'dimnames' [2] not equal to array extent
Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-
Length Class Mode
1 character character
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob [1] "LYL1" [1] "AAAVS1" Feature LYL1_1 LYL1_2 LYL1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1954 1656 2061 1620 1840 1729 3 SCYL3 572 428 588 430 460 437 4 C1orf112 1241 952 1107 949 1277 1032 5 FGR 2786 2397 3052 2323 2401 2230 6 CFH 7 14 13 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.LYL1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16829 transcripts remain for analysis. A total of 20 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2302 1853 2252 1669 1951 1743 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 72 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00160 ERCC-00144 ERCC-00157 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00051 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.3354632 GLM log(r_m) estimate weighted s.e.: 0.8471587 Number of ERCCs in Mix 1 dyn range: 72 Number of ERCCs in Mix 2 dyn range: 72 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00097 ERCC-00134 ERCC-00168 ERCC-00073 ERCC-00123 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.741534 7.524561 7.719574 7.41998 7.576097 7.463363 Disp = 0.00315 , BCV = 0.0561 Disp = 0.00315 , BCV = 0.0562 Finished DE testing Finished examining dispersions Threshold P-value 0.006084115 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.491 20 23 1:1.5 0.503 18 23 1:2 0.578 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for LYL1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "CEBPA" [1] "AAAVS1" Feature CEBPA_1 CEBPA_2 CEBPA_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1418 547 1781 1620 1840 1729 3 SCYL3 459 177 589 430 460 437 4 C1orf112 908 426 1171 949 1277 1032 5 FGR 1659 648 1791 2323 2401 2230 6 CFH 7 1 10 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.CEBPA.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16595 transcripts remain for analysis. A total of 22 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 1763 743 2081.5 1704 1993.5 1775 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 70 Outlier ERCCs for GLM r_m Estimate: ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00085 ERCC-00143 ERCC-00170 ERCC-00144 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.2735703 GLM log(r_m) estimate weighted s.e.: 0.9241978 Number of ERCCs in Mix 1 dyn range: 70 Number of ERCCs in Mix 2 dyn range: 70 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00067 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00123 ERCC-00147 ERCC-00158 ERCC-00164 ERCC-00168 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.474772 6.610696 7.640844 7.440734 7.597647 7.481556 Disp = 0.00549 , BCV = 0.0741 Disp = 0.00549 , BCV = 0.0741 Finished DE testing Finished examining dispersions Threshold P-value 0.05536443 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.610 19 23 1:1.5 0.637 18 23 1:2 0.592 16 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for CEBPA
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "ZEB2" [1] "AAAVS1" Feature ZEB2_1 ZEB2_2 ZEB2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 2361 2261 1810 1620 1840 1729 3 SCYL3 531 527 481 430 460 437 4 C1orf112 1086 1059 945 949 1277 1032 5 FGR 2523 2566 2552 2323 2401 2230 6 CFH 1 1 0 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.ZEB2.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16877 transcripts remain for analysis. A total of 19 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00138 ERCC-00142 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2103 2164 2008 1663 1944 1734 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 73 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00039 ERCC-00143 ERCC-00085 ERCC-00054 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00014 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00084 ERCC-00095 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.07284839 GLM log(r_m) estimate weighted s.e.: 0.9009232 Number of ERCCs in Mix 1 dyn range: 73 Number of ERCCs in Mix 2 dyn range: 73 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00031 ERCC-00041 ERCC-00097 ERCC-00120 ERCC-00156 ERCC-00158 ERCC-00164 ERCC-00073 ERCC-00134 ERCC-00137 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.65112 7.679714 7.604894 7.416378 7.572503 7.458186 Disp = 0.00416 , BCV = 0.0645 Disp = 0.00417 , BCV = 0.0646 Finished DE testing Finished examining dispersions Threshold P-value 0.06343123 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.435 20 23 1:1.5 0.559 18 23 1:2 0.644 18 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for ZEB2
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "MEF2D" [1] "AAAVS1" Feature MEF2D_1 MEF2D_2 MEF2D_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1983 2451 2378 1620 1840 1729 3 SCYL3 542 670 576 430 460 437 4 C1orf112 1163 1481 1332 949 1277 1032 5 FGR 3680 4706 4308 2323 2401 2230 6 CFH 17 12 14 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MEF2D.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17024 transcripts remain for analysis. A total of 17 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2150.25 2742.25 2546 1642 1913 1713 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 75 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00013 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00069 ERCC-00039 ERCC-00085 ERCC-00054 ERCC-00160 ERCC-00170 ERCC-00157 ERCC-00144 ERCC-00014 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00060 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.1296358 GLM log(r_m) estimate weighted s.e.: 0.8698226 Number of ERCCs in Mix 1 dyn range: 75 Number of ERCCs in Mix 2 dyn range: 75 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00041 ERCC-00134 ERCC-00073 ERCC-00104 ERCC-00137 ERCC-00138 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.673339 7.916534 7.842279 7.40367 7.556428 7.446001 Disp = 0.0018 , BCV = 0.0424 Disp = 0.00181 , BCV = 0.0425 Finished DE testing Finished examining dispersions Threshold P-value 0.03637135 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.485 19 23 1:1.5 0.459 18 23 1:2 0.562 19 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for MEF2D
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "ZMYND8" [1] "AAAVS1" Feature ZMYND8_1 ZMYND8_2 ZMYND8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 2140 1697 1859 1620 1840 1729 3 SCYL3 608 551 661 430 460 437 4 C1orf112 1311 1123 1319 949 1277 1032 5 FGR 4209 3864 4504 2323 2401 2230 6 CFH 8 6 7 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.ZMYND8.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17092 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2342 2038.25 2372 1633 1900.25 1707 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00039 ERCC-00085 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00062 ERCC-00095 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.1990095 GLM log(r_m) estimate weighted s.e.: 0.8644413 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00040 ERCC-00120 ERCC-00134 ERCC-00168 ERCC-00073 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.758761 7.619847 7.771489 7.398174 7.549741 7.442493 Disp = 0.0041 , BCV = 0.064 Disp = 0.0041 , BCV = 0.0641 Finished DE testing Finished examining dispersions Threshold P-value 0.009284189 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.495 19 23 1:1.5 0.556 18 23 1:2 0.595 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for ZMYND8
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "RUNX1" [1] "AAAVS1" Feature RUNX1_1 RUNX1_2 RUNX1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1863 2093 2027 1620 1840 1729 3 SCYL3 577 617 601 430 460 437 4 C1orf112 1232 1209 1309 949 1277 1032 5 FGR 2359 2615 2258 2323 2401 2230 6 CFH 8 9 7 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.RUNX1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17128 transcripts remain for analysis. A total of 22 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2247.25 2328 2294.25 1629 1895 1703 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 70 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00069 ERCC-00085 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00157 ERCC-00014 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00062 ERCC-00095 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.1358976 GLM log(r_m) estimate weighted s.e.: 0.8532505 Number of ERCCs in Mix 1 dyn range: 70 Number of ERCCs in Mix 2 dyn range: 70 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00031 ERCC-00097 ERCC-00120 ERCC-00168 ERCC-00073 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.717463 7.752765 7.738161 7.395722 7.546974 7.440147 Disp = 0.00256 , BCV = 0.0506 Disp = 0.00256 , BCV = 0.0506 Finished DE testing Finished examining dispersions Threshold P-value 0.02473796 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.559 18 23 1:1.5 0.539 18 23 1:2 0.564 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for RUNX1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
rm: cannot remove 'RNPv2.FLI1.AAAVS1.All.Pvals.csv': No such file or directory [1] "FLI1" [1] "AAAVS1" Feature FLI1_1 FLI1_2 FLI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1892 2087 2588 1620 1840 1729 3 SCYL3 450 555 668 430 460 437 4 C1orf112 1196 1338 1591 949 1277 1032 5 FGR 2480 2602 3360 2323 2401 2230 6 CFH 3 3 4 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.FLI1.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16821 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2055 2218 2616 1669 1953 1743 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00069 ERCC-00039 ERCC-00143 ERCC-00054 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00131 ERCC-00071 ERCC-00079 ERCC-00165 ERCC-00035 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.487796 GLM log(r_m) estimate weighted s.e.: 0.840473 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00013 ERCC-00097 ERCC-00120 ERCC-00134 ERCC-00164 ERCC-00073 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.628031 7.704361 7.869402 7.41998 7.577122 7.463363 Disp = 0.00243 , BCV = 0.0493 Disp = 0.00243 , BCV = 0.0493 Finished DE testing Finished examining dispersions Threshold P-value 0.002016927 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.464 19 23 1:1.5 0.493 18 23 1:2 0.491 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for FLI1
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "HOXA9" [1] "AAAVS1" Feature HOXA9_1 HOXA9_2 HOXA9_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1842 2075 2081 1620 1840 1729 3 SCYL3 516 575 602 430 460 437 4 C1orf112 1174 1241 1190 949 1277 1032 5 FGR 2239 2364 2372 2323 2401 2230 6 CFH 4 10 8 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.HOXA9.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16777 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2114 2247 2145 1675 1962 1750 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00085 ERCC-00039 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00162 ERCC-00131 ERCC-00078 ERCC-00071 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00035 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.7498464 GLM log(r_m) estimate weighted s.e.: 0.8841697 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00134 ERCC-00147 ERCC-00164 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.656337 7.717351 7.670895 7.423568 7.58172 7.467371 Disp = 0.00275 , BCV = 0.0525 Disp = 0.00276 , BCV = 0.0525 Finished DE testing Finished examining dispersions Threshold P-value 0.002819864 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.533 19 23 1:1.5 0.614 18 23 1:2 0.550 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for HOXA9
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
rm: cannot remove 'RNPv2.MYB.AAAVS1.All.Pvals.csv': No such file or directory [1] "MYB" [1] "AAAVS1" Feature MYB_1 MYB_2 MYB_3 MYB_4 MYB_5 MYB_6 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 0 0 0 2 DPM1 1695 1557 1288 1881 3921 1347 1620 1840 1729 3 SCYL3 582 482 460 469 1039 389 430 460 437 4 C1orf112 831 825 776 1108 2192 863 949 1277 1032 5 FGR 3674 3220 2807 2573 5804 2117 2323 2401 2230 6 CFH 10 17 11 18 18 8 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MYB.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 17270 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 1880.75 1759.75 1505 1834.75 3750.25 1514 1613.75 1874.75 1688.75 Check for sample mRNA fraction differences(r_m)...
R[write to console]: Error in dimnames(x) <- dn : length of 'dimnames' [2] not equal to array extent Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-
Error in dimnames(x) <- dn :
length of 'dimnames' [2] not equal to array extent
Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-
Length Class Mode
1 character character
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
Error in exDat$Figures : $ operator is invalid for atomic vectors Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob [1] "MAX" [1] "AAAVS1" Feature MAX_1 MAX_2 MAX_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1811 2032 2172 1620 1840 1729 3 SCYL3 571 656 742 430 460 437 4 C1orf112 1215 1387 1393 949 1277 1032 5 FGR 3640 4163 4084 2323 2401 2230 6 CFH 9 5 3 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MAX.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16957 transcripts remain for analysis. A total of 15 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00117 ERCC-00138 ERCC-00142 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 2142 2502 2512 1651 1928 1725 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 77 Outlier ERCCs for GLM r_m Estimate: ERCC-00123 ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00077 ERCC-00058 ERCC-00033 ERCC-00154 ERCC-00069 ERCC-00028 ERCC-00039 ERCC-00143 ERCC-00085 ERCC-00054 ERCC-00160 ERCC-00157 ERCC-00014 ERCC-00059 ERCC-00170 ERCC-00163 ERCC-00144 ERCC-00019 ERCC-00099 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00062 ERCC-00095 ERCC-00071 ERCC-00079 ERCC-00131 ERCC-00165 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00092 ERCC-00022 ERCC-00111 ERCC-00043 ERCC-00116 ERCC-00108 ERCC-00145 ERCC-00003 ERCC-00136 ERCC-00171 ERCC-00046 ERCC-00004 ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00002 ERCC-00130 GLM log(r_m) estimate: -0.4981285 GLM log(r_m) estimate weighted s.e.: 0.8587386 Number of ERCCs in Mix 1 dyn range: 77 Number of ERCCs in Mix 2 dyn range: 77 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00123 ERCC-00134 ERCC-00168 ERCC-00041 ERCC-00073 ERCC-00104 ERCC-00109 ERCC-00137 ERCC-00156 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.669495 7.824846 7.828835 7.409136 7.564238 7.452982 Disp = 0.0026 , BCV = 0.051 Disp = 0.0026 , BCV = 0.051 Finished DE testing Finished examining dispersions Threshold P-value 0.0321099 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.607 21 23 1:1.5 0.635 18 23 1:2 0.632 19 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for MAX
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
[1] "MEF2C" [1] "AAAVS1" Feature MEF2C_1 MEF2C_2 MEF2C_3 AAAVS1_1 AAAVS1_2 AAAVS1_3 1 TSPAN6 0 0 0 0 0 0 2 DPM1 1877 1951 1803 1620 1840 1729 3 SCYL3 459 498 519 430 460 437 4 C1orf112 1127 1049 1138 949 1277 1032 5 FGR 2652 3037 2824 2323 2401 2230 6 CFH 3 7 5 6 5 9 Initializing the exDat list structure... choseFDR = 0.05 repNormFactor is NULL Filename root is: RNPv2.MEF2C.AAAVS1 Transcripts were removed with a mean count < 1 or more than 2 replicates with 0 counts. Original data contained 26672 transcripts. After filtering 16818 transcripts remain for analysis. A total of 21 out of 92 ERCC controls were filtered from the data set The excluded ERCCs are: ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156 repNormFactor is NULL, Using Default Upper Quartile Normalization Method - 75th percentile normVec: 1959.75 2084 2098.75 1669.75 1953.75 1743.75 Check for sample mRNA fraction differences(r_m)... Number of ERCC Controls Used in r_m estimate 71 Outlier ERCCs for GLM r_m Estimate: ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00160 ERCC-00144 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002 GLM log(r_m) estimate: 0.3739488 GLM log(r_m) estimate weighted s.e.: 0.8715334 Number of ERCCs in Mix 1 dyn range: 71 Number of ERCCs in Mix 2 dyn range: 71 These ERCCs were not included in the signal-abundance plot, because not enough non-zero replicate measurements of these controls were obtained for both samples: ERCC-00013 ERCC-00097 ERCC-00123 ERCC-00164 ERCC-00168 ERCC-00073 Saving dynRangePlot to exDat Starting differential expression tests Show log.offset 7.580572 7.642044 7.649097 7.420429 7.577506 7.463793 Disp = 0.00253 , BCV = 0.0503 Disp = 0.00253 , BCV = 0.0503 Finished DE testing Finished examining dispersions Threshold P-value 0.0009875534 Generating ROC curve and AUC statistics... Area Under the Curve (AUC) Results: Ratio AUC Detected Spiked 4:1 0.526 19 23 1:1.5 0.592 18 23 1:2 0.467 17 23 Estimating ERCC LODR ............................................. Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound 4:1 Inf <NA> <NA> 1:1.5 Inf <NA> <NA> 1:2 Inf <NA> <NA> Warning! Estimated distribution of p-values does not cross threshold p-value, may be due to insufficient data quantity Consider adjusting FDR choice. LODR estimates are available to code ratio-abundance plot Saving main dashboard plots to pdf file... Saving exDat list to .RData file... Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device) Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off
worked for MEF2C
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 12 -none- list
Figures 7 -none- list
%matplotlib inline
ig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(data.corr(),
xticklabels=data.columns,
yticklabels=data.columns, ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x7f1f323aef10>
model = AgglomerativeClustering(n_clusters=15,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(data.corr())
ii = itertools.count(data.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
%matplotlib inline
sns.clustermap(data.corr(), figsize=(20, 20))
plt.savefig('../data/RNPv2_cluster_count.pdf')
data.sum().tolist()
[31194860.27000039, 34734170.910000145, 41947063.61999977, 46794854.38000023, 45959725.04999988, 48187669.949999854, 43703179.22999995, 54815404.069999784, 51453432.84000005, 45694014.92000012, 37739408.16000016, 35925369.88000013, 45939275.84999983, 46049236.90999998, 47474159.87999978, 48525076.05999996, 45690646.539999746, 45157321.31999988, 56639651.62999975, 41764180.25999997, 53047868.079999454, 45963304.22999989, 42284214.549999595, 47507365.27999984, 43762796.11999972, 45382911.53999989, 46972864.209999934, 45345593.949999996, 37246793.10999977, 44768420.24999964, 42046067.34999971, 50800605.66999957, 51176436.25999986, 42939652.28999985, 44136137.289999895, 40740731.69999998, 38508207.550000004, 41500257.68999979, 41227894.83000014, 43337577.789999865, 43352847.28999995, 51316363.68999997, 40072110.34000017, 43282705.06999982, 51083598.04999976, 47140394.049999766, 37620883.43999992, 44039610.83999986, 61484638.129999965, 20045963.380000293, 38556072.189999774, 31634429.490000147, 29835972.010000307, 47235734.2699997, 34097279.28000006, 14896010.669999905, 40029165.88999997, 38726353.51999988, 37015620.039999984, 31655845.910000257, 37291884.63999993, 77020486.5900007, 76035190.80000074, 80821407.53000104, 88932208.80000061, 96200436.35000083, 33570457.16000021, 39525165.01000017, 35056555.59000006]
data.shape
(26580, 73)
data
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr189-MV411-RNP_SP1-r4 | mr190-MV411-RNP_SP1-r5 | mr191-MV411-RNP_SP1-r6 | mr192-MV411-RNP_SP1-r7 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| FGR | 1443.00 | 8556.00 | 6387.00 | 5955.00 | 2359.00 | 2615.00 | 2258.00 | 3340.00 | 3229.00 | 3466.00 | ... | 2323.00 | 2401.00 | 2230.00 | 3680.00 | 2016.0 | 2285.00 | 2384.00 | 3106.00 | 4706.00 | 4308.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| BMP8B-AS1 | 3.00 | 2.00 | 2.00 | 4.00 | 10.00 | 9.00 | 9.00 | 8.00 | 4.00 | 7.00 | ... | 6.00 | 5.00 | 4.00 | 3.00 | 3.0 | 6.00 | 7.00 | 10.00 | 3.00 | 7.00 |
| H2AL1SP | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| NIPBL-DT | 462.00 | 650.00 | 478.00 | 431.00 | 777.00 | 829.00 | 782.00 | 709.00 | 743.00 | 776.00 | ... | 497.00 | 653.00 | 673.00 | 889.00 | 673.0 | 628.00 | 871.00 | 962.00 | 1099.00 | 1024.00 |
| CERNA2 | 2.00 | 7.00 | 8.00 | 3.00 | 13.00 | 6.00 | 24.00 | 9.00 | 8.00 | 12.00 | ... | 4.00 | 10.00 | 10.00 | 3.00 | 0.0 | 18.00 | 28.00 | 28.00 | 1.00 | 7.06 |
| LINC02689 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
26580 rows × 73 columns
experiments = list(set([i.split('-')[2] for i in data.columns[:-1]]))
experiments.remove("RNP_AAVS1")
data['gene_id'] = data.index
experiments
['RNP_SPI1', 'RNP_RUNX2', 'RNP_GFI1', 'RNP_IRF2BP2', 'RNP_MYC', 'RNP_LMO2', 'RNP_IKZF1', 'RNP_MYBL2', 'RNP_MEIS1', 'RNP_IRF8', 'RNP_ELF2', 'RNP_SP1', 'RNP_LYL1', 'RNP_CEBPA', 'RNP_ZEB2', 'RNP_MEF2D', 'RNP_ZMYND8', 'RNP_RUNX1', 'RNP_FLI1', 'RNP_HOXA9', 'RNP_MYB', 'RNP_MAX', 'RNP_MEF2C']
for val in experiments:
design = pd.DataFrame(index=data.columns[:-1], columns=['DMSO','Target'],
data=np.array([[1 if 'RNP_AAVS1' in i else 0 for i in data.columns[:-1]],[1 if val in i else 0 for i in data.columns[:-1]]]).T)
design.index = design.index.astype(str).str.replace('-','.')
deseq = pyDESeq2.pyDESeq2(count_matrix=data, design_matrix = design,
design_formula='~DMSO + Target', gene_column="gene_id")
deseq.run_deseq()
deseq.get_deseq_result()
r = deseq.deseq_result
r.pvalue = np.nan_to_num(np.array(r.pvalue), 1)
r.log2FoldChange = np.nan_to_num(np.array(r.log2FoldChange), 0)
results[val] = r
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 211 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 209 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 203 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 208 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 213 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 205 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 213 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 208 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 209 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 208 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 211 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 353 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 209 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 210 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 155 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 209 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 202 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 208 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 209 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 207 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 152 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 208 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
3.2.6
R[write to console]: estimating size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 205 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
results
for val in experiments:
a = h.volcano(results[val],tohighlight=ctf,title=val, maxvalue= 60, searchbox=True, minlogfold=0.5)
try:
show(a)
except RuntimeError:
show(a)
datad = data
data = data.drop(columns='mr129-MV411-RNP_MYC-r4')
col = {v:i for i, v in enumerate(set([i.split('-')[2] for i in data.columns[:-1]]))}
red = PCA(2).fit_transform(data[data.columns[:-1]].T)
h.scatter(red, labels=data.columns[:-1], radi=60000, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
red = PCA(30).fit_transform(data[data.columns[:-1]].T)
red = TSNE(2,4).fit_transform(red)
red.shape
(68, 2)
mr129-MYC-r4 seems weird
h.scatter(red, labels=data.columns[:-1], radi=10, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
pca = PCA(20)
red = pca.fit_transform(data.T)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-133-3ada5fe15c08> in <module> 1 pca = PCA(20) ----> 2 red = pca.fit_transform(data.T) ~/.local/lib/python3.7/site-packages/sklearn/decomposition/_pca.py in fit_transform(self, X, y) 367 C-ordered array, use 'np.ascontiguousarray'. 368 """ --> 369 U, S, V = self._fit(X) 370 U = U[:, :self.n_components_] 371 ~/.local/lib/python3.7/site-packages/sklearn/decomposition/_pca.py in _fit(self, X) 389 390 X = check_array(X, dtype=[np.float64, np.float32], ensure_2d=True, --> 391 copy=self.copy) 392 393 # Handle n_components==None ~/.local/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator) 529 array = array.astype(dtype, casting="unsafe", copy=False) 530 else: --> 531 array = np.asarray(array, order=order, dtype=dtype) 532 except ComplexWarning: 533 raise ValueError("Complex data not supported\n" ~/.local/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order) 83 84 """ ---> 85 return array(a, dtype, copy=False, order=order) 86 87 ValueError: could not convert string to float: 'TSPAN6'
pca.explained_variance_ratio_
data
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr182-MV411-RNP_MYBL2-r3 | mr183-MV411-RNP_HOXA9-r4 | mr184-MV411-RNP_HOXA9-r5 | mr185-MV411-RNP_HOXA9-r6 | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| 1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 3272.00 | 3686.00 | 3990.00 | 4714.0 | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 2451.00 | 2378.00 |
| 2 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 961.52 | 1024.20 | 1155.40 | 1316.6 | 430.78 | 460.04 | 437.36 | 542.42 | 670.02 | 576.38 |
| 3 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 1647.50 | 2260.80 | 2422.60 | 2757.4 | 949.22 | 1277.00 | 1032.60 | 1163.60 | 1481.00 | 1332.90 |
| 4 | 1443.00 | 8556.00 | 6387.00 | 5955.00 | 2359.00 | 2615.00 | 2258.00 | 3340.00 | 3229.00 | 3466.00 | ... | 4120.00 | 4514.00 | 4748.00 | 5478.0 | 2323.00 | 2401.00 | 2230.00 | 3680.00 | 4706.00 | 4308.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 38682 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| 38683 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| 38684 | 8.76 | 23.59 | 28.71 | 8.43 | 8.34 | 23.57 | 35.35 | 9.34 | 30.93 | 36.25 | ... | 8.43 | 5.75 | 30.07 | 22.7 | 9.09 | 9.78 | 9.26 | 31.53 | 16.19 | 5.61 |
| 38685 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| 38686 | 1.00 | 2.00 | 4.00 | 2.00 | 5.00 | 3.00 | 3.00 | 5.00 | 1.00 | 0.00 | ... | 2.00 | 2.00 | 3.00 | 4.0 | 0.00 | 0.00 | 1.00 | 0.00 | 1.00 | 4.00 |
38687 rows × 69 columns
res = {}
data = datad
totest
| mr123-MV411-RNP_IRF8-r4 | mr124-MV411-RNP_IRF8-r5 | mr125-MV411-RNP_IRF8-r6 | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | |
|---|---|---|---|---|---|---|
| 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| 1 | 2211.00 | 2243.00 | 2269.00 | 1620.00 | 1840.00 | 1729.00 |
| 2 | 611.42 | 621.91 | 622.93 | 430.78 | 460.04 | 437.36 |
| 3 | 1390.60 | 1268.10 | 1244.10 | 949.22 | 1277.00 | 1032.60 |
| 4 | 3652.00 | 3917.00 | 4442.00 | 2323.00 | 2401.00 | 2230.00 |
| ... | ... | ... | ... | ... | ... | ... |
| 38774 | 2.00 | 4.00 | 0.00 | 1.00 | 1.00 | 5.00 |
| 38775 | 165.00 | 119.00 | 130.00 | 93.00 | 139.00 | 87.00 |
| 38776 | 2.00 | 4.00 | 0.00 | 3.00 | 4.00 | 1.00 |
| 38777 | 51.00 | 52.00 | 31.00 | 41.00 | 56.00 | 33.00 |
| 38778 | 8976.00 | 7816.00 | 9319.00 | 7058.00 | 7576.00 | 5882.00 |
38779 rows × 6 columns
data = data.set_index('gene_id',drop=True)
res[val]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-168-edbe59d9522e> in <module> ----> 1 res[val] KeyError: 'RNP_IRF8'
for val in experiments:
print(val)
totest = data[[v for v in data.columns[:-1] if val in v or 'AAVS1' in v]]
cls = ['Condition' if val in v else 'DMSO' for v in totest.columns]
res[val] = gseapy.gsea(data=totest, gene_sets='WikiPathways_2013',
cls= cls, no_plot=False, processes=10)
res[val].res2d['Term'] = [i for i in res[val].res2d.index]
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
RNP_IRF8 RNP_SPI1 RNP_FLI1 RNP_CEBPA RNP_MYC RNP_MYB RNP_ELF2 RNP_GFI1 RNP_RUNX1 RNP_IRF2BP2 RNP_MEF2D RNP_IKZF1 RNP_MEF2C RNP_LMO2 RNP_MYBL2 RNP_MAX RNP_ZMYND8 RNP_LYL1 RNP_HOXA9 RNP_RUNX2 RNP_ZEB2 RNP_MEIS1
with open('../data/wikipathway_RNPv2', 'wb') as f:
pickle.dump(res,f)
with open('../data/wikipathway_RNPv2','rb') as f:
res = pickle.load(f)
for i, val in enumerate(experiments):
plt.figure(i)
res[val].res2d.Term = [i[2:-13] for i in res[val].res2d.index]
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
/home/jeremie/.local/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
a = set()
for k, val in res.items():
a.update(set(val.res2d.index))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
for i,v in val.res2d.iterrows():
a[i][n] = v.es
res = pd.DataFrame(a, index=res.keys())
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(ax=ax,data=res)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2cdbb0590>
model = AgglomerativeClustering(n_clusters=6,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-175-18b55e342942> in <module> 5 tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_] 6 sort = labels.argsort() ----> 7 a = plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True) NameError: name 'plotCorrelationMatrix' is not defined
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(ax=ax,data=res)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2cc92bc10>
fig.savefig("enriched_terms.png")
show(a)
fi
experiments
data
res = {}
for i, val in enumerate(experiments):
print(val)
totest = data[[v for v in data.columns[:-1] if val in v or 'AAVS1' in v]]
cls = ['Condition' if val in v else 'DMSO' for v in totest.columns]
res[val] = gseapy.gsea(data=totest, gene_sets='GO_Biological_Process_2015',
cls= cls, no_plot=False, processes=14)
res[val].res2d['Term'] = [i for i in res[val].res2d.index]
plt.figure(i)
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
RNP_IRF8 RNP_SPI1 RNP_FLI1 RNP_CEBPA RNP_MYC RNP_MYB RNP_ELF2 RNP_GFI1 RNP_RUNX1 RNP_IRF2BP2 RNP_MEF2D RNP_IKZF1 RNP_MEF2C RNP_LMO2 RNP_MYBL2 RNP_MAX RNP_ZMYND8 RNP_LYL1 RNP_HOXA9 RNP_RUNX2 RNP_ZEB2
/home/jeremie/.local/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
RNP_MEIS1
with open('../data/GO_Biological_Process_2015_RNPv2', 'wb') as f:
pickle.dump(res,f)
with open('GO_Biological_Process_2015','rb') as f:
res = pickle.load(f)
creating matrices
a = set()
for k, val in res.items():
a.update(set(val.res2d.Term))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
for i,v in val.res2d.iterrows():
a[v.Term][n] = v.es
res = pd.DataFrame(a, index=res.keys())
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(ax=ax,data=res)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2674cd8d0>
model = AgglomerativeClustering(n_clusters=8,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = h.plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True,title="RNP2_bioproc_corr")
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 484), ('colors', 484), ('data', 22), ('xname', 484), ('yname', 484)
/home/jeremie/.local/lib/python3.7/site-packages/bokeh/io/saving.py:126: UserWarning: save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN
warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
/home/jeremie/.local/lib/python3.7/site-packages/bokeh/io/saving.py:139: UserWarning: save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'
warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-207-d342427fa33f> in <module> ----> 1 a = h.plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True,title="RNP2_bioproc_corr") ~/JKBio/Helper.py in plotCorrelationMatrix(data, names, colors, title, dataIsCorr, invert, size, interactive, rangeto) 390 except: 391 show(p) --> 392 save(p, title + '.html') 393 394 return p # show the plot ~/.local/lib/python3.7/site-packages/bokeh/io/saving.py in save(obj, filename, resources, title, template, state, **kwargs) 84 85 filename, resources, title = _get_save_args(state, filename, resources, title) ---> 86 _save_helper(obj, filename, resources, title, template) 87 return abspath(filename) 88 ~/.local/lib/python3.7/site-packages/bokeh/io/saving.py in _save_helper(obj, filename, resources, title, template) 146 ''' 147 from ..embed import file_html --> 148 html = file_html(obj, resources, title=title, template=template) 149 150 with io.open(filename, mode="w", encoding="utf-8") as f: ~/.local/lib/python3.7/site-packages/bokeh/embed/standalone.py in file_html(models, resources, title, template, template_variables, theme, suppress_callback_warning, _always_new) 288 models = models.roots 289 --> 290 with OutputDocumentFor(models, apply_theme=theme, always_new=_always_new) as doc: 291 (docs_json, render_items) = standalone_docs_json_and_render_items(models, suppress_callback_warning=suppress_callback_warning) 292 title = _title_from_models(models, title) /usr/lib/python3.7/contextlib.py in __enter__(self) 110 del self.args, self.kwds, self.func 111 try: --> 112 return next(self.gen) 113 except StopIteration: 114 raise RuntimeError("generator didn't yield") from None ~/.local/lib/python3.7/site-packages/bokeh/embed/util.py in OutputDocumentFor(objs, apply_theme, always_new) 136 doc = Document() 137 for model in objs: --> 138 doc.add_root(model) 139 140 # handle a single shared document ~/.local/lib/python3.7/site-packages/bokeh/document/document.py in add_root(self, model, setter) 302 self._roots.append(model) 303 finally: --> 304 self._pop_all_models_freeze() 305 self._trigger_on_change(RootAddedEvent(self, model, setter)) 306 ~/.local/lib/python3.7/site-packages/bokeh/document/document.py in _pop_all_models_freeze(self) 1017 self._all_models_freeze_count -= 1 1018 if self._all_models_freeze_count == 0: -> 1019 self._recompute_all_models() 1020 1021 def _recompute_all_models(self): ~/.local/lib/python3.7/site-packages/bokeh/document/document.py in _recompute_all_models(self) 1040 d._detach_document() 1041 for a in to_attach: -> 1042 a._attach_document(self) 1043 self._all_models = recomputed 1044 self._all_models_by_name = recomputed_by_name ~/.local/lib/python3.7/site-packages/bokeh/model.py in _attach_document(self, doc) 725 ''' 726 if self._document is not None and self._document is not doc: --> 727 raise RuntimeError("Models must be owned by only a single document, %r is already in a doc" % (self)) 728 doc.theme.apply_to_model(self) 729 self._document = doc RuntimeError: Models must be owned by only a single document, Rect(id='43474', ...) is already in a doc
cluster1= ['LMO2','LYL1','MAX','MEF2C']
cluster2=['GFI1','FLI1','MYB','IKZF1','ELF2','CEBPa','MEIS1']
cluster3=['IRF2BP2','MEF2C','CDK6','MEF2D','IRF8','BRD4','MYC']
cluster4= ['RUNX1','RUNX2','ZMYND8']
res.loc[cluster2].mean().sort_values()
'GO_Molecular_Function_2015',
'GeneSigDB',
'ENCODE_TF_ChIP-seq_2014',
#'Drug_Perturbations_from_GEO_2014',
'GO_Cellular_Component_2015',
'GO_Biological_Process_2015',
'PPI_Hub_Proteins',
'WikiPathways_2013',
'TF-LOF_Expression_from_GEO',
# msig db C2 C6 H http://software.broadinstitute.org/gsea/msigdb/annotate.jsp
# max's crc
ctf = [
'BRD4',
'CDK6',
'CEBPA',
'ELF2',
'FLI1',
'GFI1',
'IKZF1',
'IRF2BP2',
'IRF8',
'LMO2',
'LYL1',
'MAX',
'MEF2C',
'MEF2D',
'MEIS1',
'MYB',
'MYC',
'RUNX1',
'RUNX2',
'SPI1',
'ZEB2',
'ZMYND8'
]
deseq = pd.DataFrame()
for k, val in results.items():
deseq[k] = val.log2FoldChange
deseq=deseq.T
deseq
a = plotCorrelationMatrix(a, deseq.index[sort].tolist(),interactive=True)
ctf[11] = 'CEBPa'
ctf[]
ctf
dropping ETV6 SP1 GSE1 LDB1
deseq.loc[['MYC',
'MYB',
'SPI1',
'RUNX1',
'IRF2BP2',
'FLI1',
'ELF2',
'ZEB2',
'GFI1',
'LMO2',
'CEBPa',
'MEF2D',
'MEF2C',
'IRF8',
'MEIS1',
'RUNX2',
'RUNX2',
'ZMYND8']]
show(a)
deseq_ctf = deseq.loc[['MYC',
'MYB',
'SPI1',
'RUNX1',
'IRF2BP2',
'FLI1',
'ELF2',
'ZEB2',
'GFI1',
'LMO2',
'CEBPa',
'MEF2D',
'MEF2C',
'IRF8',
'MEIS1',
'RUNX2',
'ZMYND8']]
model = AgglomerativeClustering(n_clusters=7,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(deseq_ctf)
ii = itertools.count(deseq_ctf.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = deseq_ctf.values[sort]
a = plotCorrelationMatrix(a, deseq_ctf.index[sort].tolist(),interactive=True)
show(a)
model = AgglomerativeClustering(n_clusters=7,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(deseq)
ii = itertools.count(deseq.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = deseq.values[sort]
a = plotCorrelationMatrix(a, deseq.index[sort].tolist(),interactive=True)
show(a)
tsne, pca, clustering accross TF, CRC, most var genes, both ways.